package org.elastic.compreplatform.crawler.core.processor;

import java.io.File;
import java.util.List;
import java.util.Map;

import org.elastic.compreplatform.common.util.SpringContextUtil;
import org.elastic.compreplatform.crawler.controller.CraIpProxyController;
import org.elastic.compreplatform.crawler.core.elasticsearch.ESqlTemplate;
import org.elastic.compreplatform.crawler.core.thread.TaskThreadPoolExecutor;
import org.elastic.compreplatform.crawler.core.thread.task.XvideosPolitlinkThreadTask;
import org.elastic.compreplatform.crawler.core.thread.task.XvideosRealLinkThreadTask;
import org.elastic.compreplatform.crawler.model.CraIpProxy;
import org.elastic.compreplatform.crawler.model.XvideosResultMapper;
import org.elastic.compreplatform.crawler.util.AnalyUtil;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.baomidou.mybatisplus.toolkit.StringUtils;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class XvideosWebHtmlProcessor {
	private static Logger log = LoggerFactory.getLogger(XvideosWebHtmlProcessor.class);
	public static void main(String[] args) {
		String player = 
				"<script>\n" +
						"  logged_user = false;\n" + 
						"  var static_id_cdn = 2;\n" + 
						"  var html5player = new HTML5Player('html5video', '30114163');\n" + 
						"  html5player.setVideoTitle('Miki Sato and young boy');\n" + 
						"  html5player.setSponsors(false);\n" + 
						"  html5player.setVideoUrlLow('https://video-hw.xvideos-cdn.com/videos/3gp/f/7/2/xvideos.com_f720aae3c3b5dbb429f4f7cdf4cc62d4.mp4?e=1518886170&ri=1024&rs=85&h=940db0ac5a1eb81f80ccc074442de417');\n" + 
						"  html5player.setVideoUrlHigh('https://video-hw.xvideos-cdn.com/videos/mp4/f/7/2/xvideos.com_f720aae3c3b5dbb429f4f7cdf4cc62d4.mp4?e=1518886170&ri=1024&rs=85&h=8fe24339ff0cd285b4bb9e7e5e02f426');\n" + 
						"  html5player.setVideoHLS('https://hls-hw.xvideos-cdn.com/videos/hls/f7/20/aa/f720aae3c3b5dbb429f4f7cdf4cc62d4/hls.m3u8?e=1518886170&l=0&h=8ce5c79ff5bf74543d91d99d0d7f3f31');\n" + 
						"  html5player.setThumbUrl('https://img-hw.xvideos-cdn.com/videos/thumbslll/f7/20/aa/f720aae3c3b5dbb429f4f7cdf4cc62d4/f720aae3c3b5dbb429f4f7cdf4cc62d4.10.jpg');\n" + 
						"  html5player.setThumbUrl169('https://img-hw.xvideos-cdn.com/videos/thumbs169lll/f7/20/aa/f720aae3c3b5dbb429f4f7cdf4cc62d4/f720aae3c3b5dbb429f4f7cdf4cc62d4.19.jpg');\n" + 
						"  html5player.setRelated(video_related);\n" + 
						"  html5player.setThumbSlide('https://img-hw.xvideos-cdn.com/videos/thumbs169/f7/20/aa/f720aae3c3b5dbb429f4f7cdf4cc62d4/mozaique.jpg');\n" + 
						"  html5player.setThumbSlideBig('https://img-hw.xvideos-cdn.com/videos/thumbs169/f7/20/aa/f720aae3c3b5dbb429f4f7cdf4cc62d4/mozaiquefull.jpg');\n" + 
						"  html5player.setIdCDN('2');\n" + 
						"  html5player.setIdCdnHLS('2');\n" + 
						"  html5player.setFakePlayer(false);\n" + 
						"  html5player.setDesktopiew(true);\n" + 
						"  html5player.setUploaderName('rahulram463');\n" + 
						"  html5player.setVideoURL('/video30114163/miki_sato_and_young_boy');\n" + 
						"  html5player.setStaticDomain('static-hw.xvideos.com');\n" + 
						"  html5player.setHttps();\n" + 
						"  html5player.setCanUseHttps();\n" + 
						"  html5player.initPlayer();\n" + 
						"  document.getElementById('html5video').style.display = '';\n" + 
						" if (!html5player) {  function createRequestObject() { var xhr; try { xhr = new XMLHttpRequest(); } catch (e) { xhr = new ActiveXObject(\"Microsoft.XMLHTTP\"); } return xhr; } var js_error = createRequestObject(); js_error.open('GET', '/html5player/jserror/30114163/2', true); js_error.send(); }\n" + 
						"</script>";
		System.err.println(File.separator);
		//html5PlayerRegular(player);
		//jsoup("https://www.xvideos.com/video34374691/my_mom_best_friends_-_www.slutgirl.tk");
	}
	/**
	 * @Description: xvideos 引导连接爬取
	 * @return void  
	 * @throws
	 * @author JornTang
	 * @date 2018年3月8日
	 */
	public static void excuteXvideosPilotAnaly(int pagenum) {
		for (int i = 0; i <= pagenum; i++) {
			String p = "";
			if(i == 0) {
				p = "top";
			}else {
				p = i+"";
			}
			XvideosPilotAnaly("https://www.xvideos.com/?k=japanese&p=" + p);
			XvideosPilotAnaly("https://www.xvideos.com/?k=japanese+mom&p=" + p);
		}
	}
	/**
	 * @Description: 引导连接抓取分析  
	 * @return void  
	 * @throws
	 * @author JornTang
	 * @date 2018年3月7日
	 */
	public static void XvideosPilotAnaly(String url) {
		CraIpProxy proxy = AnalyUtil.getCraIpProxy();
		//从一个网站获取和解析一个HTML文档，jsoup的API中有此方法的说明  
        Document document;
        boolean flag = true;
        int count = 0;
        while(flag && count<=3) {
        	count++;
        	try {
    			Connection conn = Jsoup.connect(url).userAgent("Mozilla");
    			if(proxy!= null) {
    				conn.proxy(proxy.getProxyIp(), proxy.getProxyPort());
    			}
    			document = conn.get();
    			flag = false;
    	        Elements thumbBlock = document.getElementsByAttributeValue("class", "thumb-block");
    	        //采用线程池分析
    	        XvideosPolitlinkThreadTask linkTask = new XvideosPolitlinkThreadTask(thumbBlock);
    	        TaskThreadPoolExecutor.addTaskToThreadPool(linkTask);
    		} catch (Exception e) {
    			log.error("引导连接抓取分析", e);
    		} 
        }
	}
	/**
	 * @Description: 分析所有连接并获取实际视频连接  
	 * @return void  
	 * @throws
	 * @author JornTang
	 * @date 2018年3月7日
	 */
	public static void excuteAllLinks(int offset, int limit, int bulk) {
		try {
			XvideosResultMapper xvideosResultMapper = ESqlTemplate.searchBySql("select * from xvideos where handle_type <> 2 limit " + offset + "," + limit);
			if(xvideosResultMapper!= null) {
				Map<String, Object> hits = xvideosResultMapper.getHits();
				List<Map<String, Object>> hitsList = (List<Map<String, Object>>) hits.get("hits");
				for (int i = 0; i < hitsList.size(); i++) {
					Map<String, Object> hit = hitsList.get(i);
					Map<String, Object> source = (Map<String, Object>) hit.get("_source");
					String polit_link = source.get("polit_link")+"";
					if(StringUtils.isNotEmpty(polit_link)) {
						XvideosRealLinkAnaly(source, "https://www.xvideos.com" + polit_link);
					}
				}
				int _bulk = ++bulk;
				excuteAllLinks(limit*_bulk, limit, _bulk);
			}
		} catch (Exception e) {
			log.error("分析所有连接并获取实际视频连接异常", e);
		}
	}
	/**
	 * @Description: xvideos实际连接分析  
	 * @return void  
	 * @throws
	 * @author JornTang
	 * @date 2018年3月7日
	 */
	public static void XvideosRealLinkAnaly(Map<String, Object> source, String url) {
		//从一个网站获取和解析一个HTML文档，jsoup的API中有此方法的说明  
        Document document;
        Connection conn = null;
        Elements thumbBlock = null;
        CraIpProxyController proxyController = SpringContextUtil.getBean(CraIpProxyController.class);
        boolean flag = true;
        int count = 0;
        while(flag && count<=3) {
        	count++;
        	try {
        		Integer handleType = source.get("handle_type")!= null? Integer.valueOf(source.get("handle_type")+""): -1;
        		if(handleType != 1) {
        			//随机获取响应时间最短前20任意代理
        			int random = (int)(1+Math.random()*(20-1+1));
        			CraIpProxy proxy = proxyController.getRandomProxy(random);
        			conn = Jsoup.connect(url).userAgent("Mozilla");
        			if(proxy!= null) {
        				conn.proxy(proxy.getProxyIp(), proxy.getProxyPort());
        			}
        			document = conn.get();
        			flag = false;
        	        thumbBlock = document.getElementsByTag("script");
        	        if(thumbBlock == null) {
        	        	return;
        	        }
        		}
    	        //采用线程池分析
    	        XvideosRealLinkThreadTask linkTask = new XvideosRealLinkThreadTask(source, thumbBlock);
    	        TaskThreadPoolExecutor.addTaskToThreadPool(linkTask);
    		} catch (Exception e) {
    			log.error("引导连接抓取分析", e);
    		} 
        }
	}
	/**
	 * @Description: 使用jsoup抓取网页
	 * @param url   
	 * @return void  
	 * @throws
	 * @author JornTang
	 * @date 2018年2月10日
	 */
	public static void jsoup(String url){
		try {
			//从一个网站获取和解析一个HTML文档，jsoup的API中有此方法的说明  
			Connection conn = Jsoup.connect(url).userAgent("Mozilla");
			conn.proxy("122.13.248.215", 8888);
			Document documents = conn.get();
	        Elements thumbBlock = documents.getElementsByTag("script");
	        System.err.println(thumbBlock);
	        Document document = Jsoup.connect(url).userAgent("Mozilla").get();  
	        /*取得script下面的JS变量*/  
	        Elements e = document.getElementsByTag("script");
	        //System.err.println(e.toString());
	        //获取所有的img标签  
	       // Elements elements = document.getElementsByTag("td");  
//	        for(Element element : elements){  
//	            //获取每个img标签的src属性的内容，即图片地址，加"abs:"表示绝对路径  
//	            String imgSrc = element.outerHtml();  
//	            System.out.println(imgSrc);  
//	        }  
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	/**
	 * @Description: htmlunit抓取网页
	 * @param url   
	 * @return void  
	 * @throws
	 * @author JornTang
	 * @date 2018年2月10日
	 */
	public static void htmlunit(String url, String type){
		// 1创建WebClient
		WebClient webClient=new WebClient(BrowserVersion.CHROME);
		try {
			// 2 启动JS
	         webClient.getOptions().setJavaScriptEnabled(true);
	        // 3 禁用Css，可避免自动二次請求CSS进行渲染
	         webClient.getOptions().setCssEnabled(false);
	        // 4 启动客戶端重定向
	         webClient.getOptions().setRedirectEnabled(true);
	        // 5 js运行错誤時，是否拋出异常
	         webClient.getOptions().setThrowExceptionOnScriptError(false);
	        // 6 设置超时
	         webClient.getOptions().setTimeout(3*60*1000);
	         //获取网页
	         HtmlPage htmlPage = webClient.getPage(url);
	        // 等待JS驱动dom完成获得还原后的网页
	          webClient.waitForBackgroundJavaScript(10000);
	        // 网页內容
	        Document doc=Jsoup.parse(htmlPage.asXml());
	        System.err.println(doc.toString());
//	        //抓取代理ip
//	        if(ProxyEnum.IP181.getType().equals(type)){
//	        	ip181Analy(doc);
//	        }else if(ProxyEnum.XICIDAILI.getType().equals(type)){
//	        	xicidailiAnaly(doc);
//	        	//递归获取xicidaili免费IP地址
//	        	String proxyUrl = doHandleUrl(url);
//	        	if(StringUtils.isNotEmpty(proxyUrl)){
//	        		htmlunit(proxyUrl, ProxyEnum.XICIDAILI.getType());
//	        	}
//	        }
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			//关闭windows，释放资源
			if(webClient != null){
				 webClient.closeAllWindows();
			}
		}
	}
}
